import requests
import re
import csv
from useragent_helper import get_random_ug


def ask_page():  # 询问用户需要获取的页数
    answer_correct = False
    ans = '0'
    while not answer_correct:
        ans = input("你想要第几页: ")
        if ans.isdigit():
            if int(ans) >= 0:
                answer_correct = True
            else:
                print("您的输入不合法，请输入一个正整数。")
        else:
            print("您的输入不合法，请输入一个正整数。")
    _page = (int(ans)-1)*25
    return _page


def load_page(_page=0):  # 获取页面源代码
    url = "https://movie.douban.com/top250?start=%s&filter=" % _page
    resp = requests.get(url, headers={'user-agent': get_random_ug()})
    _page_content = resp.text
    return _page_content


def save_page(_page_content):  # 解析数据，通过re进一步提取数据，并存储在文件中（csv格式便于以后的数据分析）
    obj = re.compile(
        r'<li>.*?<div class="item">.*?<span class="title">(?P<title>.*?)</span>.*?' +
        r'<p class="">.*?<br>(?P<year>.*?)&nbsp.*?' +
        r'<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?' +
        r'<span>(?P<population>.*?)</span>', flags=re.S)
    f = open('data.csv', mode='w', encoding='gbk', newline='')
    csv_writer = csv.writer(f)  # 让接下来在data.csv文件中写入内容时，使用csv
    print("页面信息如下：")
    for i in obj.finditer(_page_content):
        dic = i.groupdict()
        dic['year'] = dic['year'].strip()
        print(dic)
        csv_writer.writerow(dic.values())
    f.close()


if __name__ == '__main__':
    page = ask_page()
    page_content = load_page(page)
    print("页面源代码下载成功。")
    save_page(page_content)
    print("成功将该页面信息保存到data.csv。")
    print("程序已结束。")
